###############################################################
### Replication Code: Dictionary-Based Text Analysis        ###
### Title: Can Conservatives Be Persuaded to Support UBI?   ###
### Author: Eddy S. F. Yeung                                ###
### Version: September 6, 2022                              ###
###############################################################

# Set-up ----
## Clean the R environment and set the working directory
rm(list = ls())
setwd("~/Desktop/UBI/PB_replication") # change to your own directory

## Load the required packages
library(tidyverse) # version 1.3.1
library(quanteda)  # version 3.2.0
library(extrafont) # version 0.17

## Import the cleaned dataset
load("cleaned.RData")
df$text <- ifelse(df$support >= 4 & df$support <= 6, df$open.ended.favor,
                  ifelse(df$support == 3, df$open.ended.neutral,
                         ifelse(df$support >= 0 & df$support <= 2, df$open.ended.oppose, NA)))
df$group <- as.factor(df$group)
df <- df %>% filter(conservative == 1)  # analyze conservatives only

# Process the text data ----
## Create a dictionary
myDict <- dictionary(list(stimulus = c("stimulus", "package", "check")))

## Tokenization and normalization
dtm <- dfm(df$text, tolower = T, stem = F)

# Count how many respondents mention either the word "stimulus," "package," or "check" ----
dtm <- dfm_lookup(dtm, myDict)
dtm <- convert(dtm, to = "data.frame")  # convert to data frame
dtm$stimulus <- ifelse(dtm$stimulus > 0, 1, 0)
sum(dtm$stimulus)